rm(list=ls())
.packages = c("car","doBy","lubridate","VIM","mi","mice", "Amelia","naniar", "dplyr", "lubridate")
.inst <- .packages %in% installed.packages()
if(length(.packages[!.inst]) > 0) install.packages(.packages[!.inst])
lapply(.packages, require, character.only=TRUE)

1 Properties of Missing Values

NA = Not Available

vector with missing values

x <- c(1, 99, 3, NA, 5, 5, NA, 99, 3, 3, NA, 1, 3, 5, 1, 1 )
x < 3
##  [1]  TRUE FALSE FALSE    NA FALSE FALSE    NA FALSE FALSE FALSE    NA  TRUE
## [13] FALSE FALSE  TRUE  TRUE
x == 99
##  [1] FALSE  TRUE FALSE    NA FALSE FALSE    NA  TRUE FALSE FALSE    NA FALSE
## [13] FALSE FALSE FALSE FALSE

NA cannot be used in comparisons

x == NA 
##  [1] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA

NA + anything = NA

NA + 3
## [1] NA
NA | TRUE
## [1] TRUE
NA | FALSE
## [1] NA

na actions in functions

sum(x)
## [1] NA
sum(x, na.rm = T)
## [1] 229

NaN = Not a Number

Inf = Infinity

-Inf = Negative Infinity

NA + NaN
## [1] NA
NaN + NA
## [1] NA

2 data structures with MVs

vector

x <- c(1, 99, 3, NA, 5, 5, NA, 99, 3, 3, NA, 1, 3, 5, 1, 1 )

data.frame with missing values:

library(datasets)
data(airquality)
data(sleep)

time series

library(imputeTS)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
tsAirgap
##      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1949 112 118 132 129  NA 135 148 148  NA 119 104 118
## 1950 115 126 141 135 125 149 170 170  NA 133  NA 140
## 1951 145 150 178 163 172 178 199 199 184 162 146 166
## 1952 171 180 193 181 183 218 230 242 209 191 172 194
## 1953 196 196 236 235 229 243 264 272 237 211 180 201
## 1954 204 188 235 227 234  NA 302 293 259 229 203 229
## 1955 242 233 267 269 270 315 364 347 312 274 237 278
## 1956 284 277  NA  NA  NA 374 413 405 355 306 271 306
## 1957 315 301 356 348 355  NA 465 467 404 347  NA 336
## 1958 340 318  NA 348 363 435 491 505 404 359 310 337
## 1959 360 342 406 396 420 472 548 559 463 407 362  NA
## 1960 417 391 419 461  NA 535 622 606 508 461 390 432

spatial

spatio-temporal

library(cutoffR)
#hqmr.data

3 detecting missing values

summary functions

summary(airquality)
##      Ozone           Solar.R           Wind             Temp      
##  Min.   :  1.00   Min.   :  7.0   Min.   : 1.700   Min.   :56.00  
##  1st Qu.: 18.00   1st Qu.:115.8   1st Qu.: 7.400   1st Qu.:72.00  
##  Median : 31.50   Median :205.0   Median : 9.700   Median :79.00  
##  Mean   : 42.13   Mean   :185.9   Mean   : 9.958   Mean   :77.88  
##  3rd Qu.: 63.25   3rd Qu.:258.8   3rd Qu.:11.500   3rd Qu.:85.00  
##  Max.   :168.00   Max.   :334.0   Max.   :20.700   Max.   :97.00  
##  NA's   :37       NA's   :7                                       
##      Month            Day      
##  Min.   :5.000   Min.   : 1.0  
##  1st Qu.:6.000   1st Qu.: 8.0  
##  Median :7.000   Median :16.0  
##  Mean   :6.993   Mean   :15.8  
##  3rd Qu.:8.000   3rd Qu.:23.0  
##  Max.   :9.000   Max.   :31.0  
## 
skimr::skim(airquality)
Data summary
Name airquality
Number of rows 153
Number of columns 6
_______________________
Column type frequency:
numeric 6
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Ozone 37 0.76 42.13 32.99 1.0 18.00 31.5 63.25 168.0 ▇▃▂▁▁
Solar.R 7 0.95 185.93 90.06 7.0 115.75 205.0 258.75 334.0 ▅▃▅▇▅
Wind 0 1.00 9.96 3.52 1.7 7.40 9.7 11.50 20.7 ▂▇▇▃▁
Temp 0 1.00 77.88 9.47 56.0 72.00 79.0 85.00 97.0 ▂▃▇▇▃
Month 0 1.00 6.99 1.42 5.0 6.00 7.0 8.00 9.0 ▇▇▇▇▇
Day 0 1.00 15.80 8.86 1.0 8.00 16.0 23.00 31.0 ▇▇▇▇▆
Hmisc::describe(airquality)
## airquality 
## 
##  6  Variables      153  Observations
## --------------------------------------------------------------------------------
## Ozone 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      116       37       67    0.999    42.13    35.28     7.75    11.00 
##      .25      .50      .75      .90      .95 
##    18.00    31.50    63.25    87.00   108.50 
## 
## lowest :   1   4   6   7   8, highest: 115 118 122 135 168
## --------------------------------------------------------------------------------
## Solar.R 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      146        7      117        1    185.9    102.7    24.25    47.50 
##      .25      .50      .75      .90      .95 
##   115.75   205.00   258.75   288.50   311.50 
## 
## lowest :   7   8  13  14  19, highest: 320 322 323 332 334
## --------------------------------------------------------------------------------
## Wind 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      153        0       31    0.997    9.958    3.964     4.60     5.82 
##      .25      .50      .75      .90      .95 
##     7.40     9.70    11.50    14.90    15.50 
## 
## lowest :  1.7  2.3  2.8  3.4  4.0, highest: 16.1 16.6 18.4 20.1 20.7
## --------------------------------------------------------------------------------
## Temp 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      153        0       40    0.999    77.88    10.74     60.2     64.2 
##      .25      .50      .75      .90      .95 
##     72.0     79.0     85.0     90.0     92.0 
## 
## lowest : 56 57 58 59 61, highest: 92 93 94 96 97
## --------------------------------------------------------------------------------
## Month 
##        n  missing distinct     Info     Mean      Gmd 
##      153        0        5     0.96    6.993    1.608 
## 
## lowest : 5 6 7 8 9, highest: 5 6 7 8 9
##                                         
## Value          5     6     7     8     9
## Frequency     31    30    31    31    30
## Proportion 0.203 0.196 0.203 0.203 0.196
## --------------------------------------------------------------------------------
## Day 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      153        0       31    0.999     15.8    10.26      2.0      4.0 
##      .25      .50      .75      .90      .95 
##      8.0     16.0     23.0     28.0     29.4 
## 
## lowest :  1  2  3  4  5, highest: 27 28 29 30 31
## --------------------------------------------------------------------------------

logical

base::in.na(), anyNA()
naniar::is_na(), any_na(), any_miss(), any_complete()
is.na(x)
##  [1] FALSE FALSE FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE
## [13] FALSE FALSE FALSE FALSE
is.na(airquality)
##        Ozone Solar.R  Wind  Temp Month   Day
##   [1,] FALSE   FALSE FALSE FALSE FALSE FALSE
##   [2,] FALSE   FALSE FALSE FALSE FALSE FALSE
##   [3,] FALSE   FALSE FALSE FALSE FALSE FALSE
##   [4,] FALSE   FALSE FALSE FALSE FALSE FALSE
##   [5,]  TRUE    TRUE FALSE FALSE FALSE FALSE
##   [6,] FALSE    TRUE FALSE FALSE FALSE FALSE
##   [7,] FALSE   FALSE FALSE FALSE FALSE FALSE
##   [8,] FALSE   FALSE FALSE FALSE FALSE FALSE
##   [9,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [10,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [11,] FALSE    TRUE FALSE FALSE FALSE FALSE
##  [12,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [13,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [14,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [15,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [16,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [17,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [18,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [19,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [20,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [21,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [22,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [23,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [24,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [25,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [26,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [27,]  TRUE    TRUE FALSE FALSE FALSE FALSE
##  [28,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [29,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [30,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [31,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [32,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [33,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [34,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [35,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [36,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [37,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [38,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [39,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [40,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [41,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [42,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [43,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [44,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [45,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [46,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [47,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [48,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [49,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [50,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [51,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [52,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [53,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [54,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [55,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [56,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [57,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [58,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [59,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [60,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [61,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [62,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [63,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [64,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [65,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [66,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [67,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [68,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [69,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [70,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [71,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [72,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [73,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [74,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [75,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [76,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [77,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [78,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [79,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [80,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [81,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [82,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [83,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [84,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [85,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [86,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [87,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [88,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [89,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [90,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [91,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [92,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [93,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [94,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [95,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [96,] FALSE    TRUE FALSE FALSE FALSE FALSE
##  [97,] FALSE    TRUE FALSE FALSE FALSE FALSE
##  [98,] FALSE    TRUE FALSE FALSE FALSE FALSE
##  [99,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [100,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [101,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [102,]  TRUE   FALSE FALSE FALSE FALSE FALSE
## [103,]  TRUE   FALSE FALSE FALSE FALSE FALSE
## [104,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [105,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [106,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [107,]  TRUE   FALSE FALSE FALSE FALSE FALSE
## [108,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [109,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [110,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [111,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [112,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [113,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [114,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [115,]  TRUE   FALSE FALSE FALSE FALSE FALSE
## [116,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [117,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [118,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [119,]  TRUE   FALSE FALSE FALSE FALSE FALSE
## [120,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [121,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [122,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [123,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [124,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [125,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [126,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [127,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [128,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [129,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [130,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [131,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [132,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [133,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [134,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [135,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [136,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [137,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [138,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [139,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [140,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [141,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [142,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [143,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [144,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [145,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [146,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [147,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [148,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [149,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [150,]  TRUE   FALSE FALSE FALSE FALSE FALSE
## [151,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [152,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [153,] FALSE   FALSE FALSE FALSE FALSE FALSE
is.na(tsAirgap)
##   [1] FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE
##  [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE
##  [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [61] FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE
##  [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [85] FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [97] FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE
## [109] FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE
## [133] FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
naniar::any_na(airquality)
## [1] TRUE
complete.cases(airquality)
##   [1]  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE  TRUE
##  [13]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [25] FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE
##  [37] FALSE  TRUE FALSE  TRUE  TRUE FALSE FALSE  TRUE FALSE FALSE  TRUE  TRUE
##  [49]  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [61] FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
##  [73]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE
##  [85]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
##  [97] FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE  TRUE
## [109]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE
## [121]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [133]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [145]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE

numerical summaries

VIM::countNA(airquality)
## [1] 44
naniar::any_na(airquality)
## [1] TRUE
naniar::any_miss(airquality)
## [1] TRUE
naniar::n_miss(airquality)
## [1] 44
naniar::n_complete(airquality)
## [1] 874

4 Recoding values to missing

When dealing with missing values, you might want to replace values with a missing values (NA). This is useful in cases when you know the origin of the data and can be certain which values should be missing. For example, you might know that all values of “N/A”, “N A”, and “Not Available”, or -99, or -1 are supposed to be missing.

x[x == 99] <- NA
x
##  [1]  1 NA  3 NA  5  5 NA NA  3  3 NA  1  3  5  1  1
is.na(x)<-which(x==5)
x
##  [1]  1 NA  3 NA NA NA NA NA  3  3 NA  1  3 NA  1  1

tidyr::replace_na() Missing values turns into a value (NA –> -99)

tidyr::replace_na(x)
##  [1]  1 NA  3 NA NA NA NA NA  3  3 NA  1  3 NA  1  1

dplyr

dplyr::na_if()

naniar::replace_with_na(): Value becomes a missing value (-99 –> NA) , data.frame

replace_with_na(data, replace = list(), ...)
replace_with_na_all()
replace_with_na_at()
replace_with_na_if()

5 Detect missing values

base::is.na(), is.na.data.frame(), is.na.numeric_version(), is.na.POSIXlt()
naniar::any_na(), any_miss()
stats::complete.cases()
naniar::n_miss(), n_complete(), pct_miss()
naniar::any_na(NaN)
## [1] TRUE
naniar::any_na(NULL)
## [1] FALSE
naniar::any_na(Inf)
## [1] FALSE
x
##  [1]  1 NA  3 NA NA NA NA NA  3  3 NA  1  3 NA  1  1
complete.cases(x) # tam satirlar
##  [1]  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE  TRUE
## [13]  TRUE FALSE  TRUE  TRUE
is.na(x)
##  [1] FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE FALSE
## [13] FALSE  TRUE FALSE FALSE
!complete.cases(x)  
##  [1] FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE FALSE
## [13] FALSE  TRUE FALSE FALSE
naniar::n_miss(x)
## [1] 8
naniar::n_complete(x)
## [1] 8
naniar::pct_miss(x)
## [1] 50
naniar::pct_complete(x)
## [1] 50
str(airquality)
## 'data.frame':    153 obs. of  6 variables:
##  $ Ozone  : int  41 36 12 18 NA 28 23 19 8 NA ...
##  $ Solar.R: int  190 118 149 313 NA NA 299 99 19 194 ...
##  $ Wind   : num  7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
##  $ Temp   : int  67 72 74 62 56 66 65 59 61 69 ...
##  $ Month  : int  5 5 5 5 5 5 5 5 5 5 ...
##  $ Day    : int  1 2 3 4 5 6 7 8 9 10 ...
dim(airquality)
## [1] 153   6

missing data olmayan satirlar

airquality[complete.cases(airquality), ]
dim(airquality[complete.cases(airquality), ])
## [1] 111   6

en az bir missing data olan satirlar

airquality[!complete.cases(airquality), ]
dim(airquality[!complete.cases(airquality), ])
## [1] 42  6

mantiksal operatorlerin sayisal degerleri 1 ve 0 oldugundan,

sum(is.na(airquality$Ozone)) # Dream degiskenindeki bos gozlem sayisi 
## [1] 37
VIM::countNA(airquality$Ozone) # VIM paketi ile
## [1] 37
mean(is.na(sleep$Dream)) # Dream degiskenindeki bos gozlem orani
## [1] NaN
mean(!complete.cases(sleep)) # veri setinde en az bir bos gozlem olan satir orani
## [1] 0

6 missing data in arguments

na.omit, na.exclude, na.pass, na.fail, na.action, na.rm, na.last, useNA
mean(airquality$Ozone)
## [1] NA
mean(airquality$Ozone,na.rm=TRUE)
## [1] 42.12931
mean(na.omit(airquality$Ozone))
## [1] 42.12931
na.omit(airquality$Ozone)
##   [1]  41  36  12  18  28  23  19   8   7  16  11  14  18  14  34   6  30  11
##  [19]   1  11   4  32  23  45 115  37  29  71  39  23  21  37  20  12  13 135
##  [37]  49  32  64  40  77  97  97  85  10  27   7  48  35  61  79  63  16  80
##  [55] 108  20  52  82  50  64  59  39   9  16  78  35  66 122  89 110  44  28
##  [73]  65  22  59  23  31  44  21   9  45 168  73  76 118  84  85  96  78  73
##  [91]  91  47  32  20  23  21  24  44  21  28   9  13  46  18  13  24  16  13
## [109]  23  36   7  14  30  14  18  20
## attr(,"na.action")
##  [1]   5  10  25  26  27  32  33  34  35  36  37  39  42  43  45  46  52  53  54
## [20]  55  56  57  58  59  60  61  65  72  75  83  84 102 103 107 115 119 150
## attr(,"class")
## [1] "omit"
airquality$Ozone
##   [1]  41  36  12  18  NA  28  23  19   8  NA   7  16  11  14  18  14  34   6
##  [19]  30  11   1  11   4  32  NA  NA  NA  23  45 115  37  NA  NA  NA  NA  NA
##  [37]  NA  29  NA  71  39  NA  NA  23  NA  NA  21  37  20  12  13  NA  NA  NA
##  [55]  NA  NA  NA  NA  NA  NA  NA 135  49  32  NA  64  40  77  97  97  85  NA
##  [73]  10  27  NA   7  48  35  61  79  63  16  NA  NA  80 108  20  52  82  50
##  [91]  64  59  39   9  16  78  35  66 122  89 110  NA  NA  44  28  65  NA  22
## [109]  59  23  31  44  21   9  NA  45 168  73  NA  76 118  84  85  96  78  73
## [127]  91  47  32  20  23  21  24  44  21  28   9  13  46  18  13  24  16  13
## [145]  23  36   7  14  30  NA  14  18  20
na.omit(airquality)

returns the object with observations removed if they contain any missing values

na.exclude(airquality) 

differences between omitting and excluding NAs can be seen in some prediction and residual functions

na.fail(airquality)

returns the object only if it contains no missing values

na.pass(airquality) 

returns the object unchanged

7 omit vs exclude

omit.model<- lm(Ozone ~ Solar.R, data = airquality, na.action = na.omit)
exclude.model<- lm(Ozone ~ Solar.R, data = airquality, na.action = na.exclude)
omit.model
## 
## Call:
## lm(formula = Ozone ~ Solar.R, data = airquality, na.action = na.omit)
## 
## Coefficients:
## (Intercept)      Solar.R  
##     18.5987       0.1272
exclude.model
## 
## Call:
## lm(formula = Ozone ~ Solar.R, data = airquality, na.action = na.exclude)
## 
## Coefficients:
## (Intercept)      Solar.R  
##     18.5987       0.1272
resid(omit.model)
##           1           2           3           4           7           8 
##  -1.7601294   2.3957702 -25.5463532 -40.4014578 -33.6211440 -12.1880897 
##           9          12          13          14          15          16 
## -13.0148679 -35.1530373 -44.4766565 -39.4420122  -8.8644704 -47.0719285 
##          17          18          19          20          21          22 
## -23.6384662 -22.5176190 -29.5459452 -13.1939997 -18.6160499 -48.2916147 
##          23          24          28          29          30          31 
## -17.7778596   1.7020672   2.7481237  -5.6443762  68.0434167 -17.0778386 
##          38          40          41          44          47          48 
##  -5.7487173  15.3961782 -20.6731105 -14.4191880 -21.8872947 -17.7136649 
##          49          50          51          62          63          64 
##  -3.3038428 -21.8585604 -23.0203700  82.1938142  -1.1357151 -16.6097319 
##          66          67          68          69          70          71 
##  23.1473497 -18.5286231  23.3036573  44.4481447  43.8123183  44.1473497 
##          73          74          76          77          78          79 
## -42.1703595 -13.8526503 -17.7026608  -3.6616984 -18.4420122   6.1591698 
##          80          81          82          85          86          87 
##  36.6213664  16.4249125  -3.4888847  24.0146824  61.0434167  -8.8991148 
##          88          89          90          91          92          93 
##  22.9737200  36.3150694  -3.5691775  13.2284585   8.1012932   9.8465547 
##          94          95          99         100         101         104 
## -12.6506943 -12.3904537  70.9741280  41.2804250  65.0780610   0.9855401 
##         105         106         108         109         110         111 
## -25.3148469  26.4363246  -5.6274621  33.9158434 -10.2227340 -18.6270541 
##         112         113         114         116         117         118 
##   1.2398706 -30.5345331 -14.1766776  -0.5577654 119.1359376  27.0607388 
##         120         121         122         123         124         125 
##  31.5867221  70.7890861  35.2631028  42.4942012  56.1646719  34.3497137 
##         126         127         128         129         130         131 
##  31.1300275  48.3670359  16.3205714   1.7020672 -30.6443762 -23.5750875 
##         132         133         134         135         136         137 
## -26.8467403 -27.5345331  -4.6097319 -30.5345331 -20.8640624 -12.6506943 
##         138         139         140         141         142         143 
## -19.8412382  -2.7368972 -29.0837486  -9.0321901 -24.8640624 -28.1589474 
##         144         145         146         147         148         149 
## -35.8640624   2.6209584  -0.2747005 -17.8298261  -7.1420332 -13.1416252 
##         151         152         153 
## -28.8872947 -17.2573784 -26.9565833
resid(exclude.model)
##           1           2           3           4           5           6 
##  -1.7601294   2.3957702 -25.5463532 -40.4014578          NA          NA 
##           7           8           9          10          11          12 
## -33.6211440 -12.1880897 -13.0148679          NA          NA -35.1530373 
##          13          14          15          16          17          18 
## -44.4766565 -39.4420122  -8.8644704 -47.0719285 -23.6384662 -22.5176190 
##          19          20          21          22          23          24 
## -29.5459452 -13.1939997 -18.6160499 -48.2916147 -17.7778596   1.7020672 
##          25          26          27          28          29          30 
##          NA          NA          NA   2.7481237  -5.6443762  68.0434167 
##          31          32          33          34          35          36 
## -17.0778386          NA          NA          NA          NA          NA 
##          37          38          39          40          41          42 
##          NA  -5.7487173          NA  15.3961782 -20.6731105          NA 
##          43          44          45          46          47          48 
##          NA -14.4191880          NA          NA -21.8872947 -17.7136649 
##          49          50          51          52          53          54 
##  -3.3038428 -21.8585604 -23.0203700          NA          NA          NA 
##          55          56          57          58          59          60 
##          NA          NA          NA          NA          NA          NA 
##          61          62          63          64          65          66 
##          NA  82.1938142  -1.1357151 -16.6097319          NA  23.1473497 
##          67          68          69          70          71          72 
## -18.5286231  23.3036573  44.4481447  43.8123183  44.1473497          NA 
##          73          74          75          76          77          78 
## -42.1703595 -13.8526503          NA -17.7026608  -3.6616984 -18.4420122 
##          79          80          81          82          83          84 
##   6.1591698  36.6213664  16.4249125  -3.4888847          NA          NA 
##          85          86          87          88          89          90 
##  24.0146824  61.0434167  -8.8991148  22.9737200  36.3150694  -3.5691775 
##          91          92          93          94          95          96 
##  13.2284585   8.1012932   9.8465547 -12.6506943 -12.3904537          NA 
##          97          98          99         100         101         102 
##          NA          NA  70.9741280  41.2804250  65.0780610          NA 
##         103         104         105         106         107         108 
##          NA   0.9855401 -25.3148469  26.4363246          NA  -5.6274621 
##         109         110         111         112         113         114 
##  33.9158434 -10.2227340 -18.6270541   1.2398706 -30.5345331 -14.1766776 
##         115         116         117         118         119         120 
##          NA  -0.5577654 119.1359376  27.0607388          NA  31.5867221 
##         121         122         123         124         125         126 
##  70.7890861  35.2631028  42.4942012  56.1646719  34.3497137  31.1300275 
##         127         128         129         130         131         132 
##  48.3670359  16.3205714   1.7020672 -30.6443762 -23.5750875 -26.8467403 
##         133         134         135         136         137         138 
## -27.5345331  -4.6097319 -30.5345331 -20.8640624 -12.6506943 -19.8412382 
##         139         140         141         142         143         144 
##  -2.7368972 -29.0837486  -9.0321901 -24.8640624 -28.1589474 -35.8640624 
##         145         146         147         148         149         150 
##   2.6209584  -0.2747005 -17.8298261  -7.1420332 -13.1416252          NA 
##         151         152         153 
## -28.8872947 -17.2573784 -26.9565833
data.frame(resid(omit.model),resid(exclude.model)) # error
fitted(omit.model)
##        1        2        3        4        7        8        9       12 
## 42.76013 33.60423 37.54635 58.40146 56.62114 31.18809 21.01487 51.15304 
##       13       14       15       16       17       18       19       20 
## 55.47666 53.44201 26.86447 61.07193 57.63847 28.51762 59.54595 24.19400 
##       21       22       23       24       28       29       30       31 
## 19.61605 59.29161 21.77786 30.29793 20.25188 50.64438 46.95658 54.07784 
##       38       40       41       44       47       48       49       50 
## 34.74872 55.60382 59.67311 37.41919 42.88729 54.71366 23.30384 33.85856 
##       51       62       63       64       66       67       68       69 
## 36.02037 52.80619 50.13572 48.60973 40.85265 58.52862 53.69634 52.55186 
##       70       71       73       74       76       77       78       79 
## 53.18768 40.85265 52.17036 40.85265 24.70266 51.66170 53.44201 54.84083 
##       80       81       82       85       86       87       88       89 
## 42.37863 46.57509 19.48888 55.98532 46.95658 28.89911 29.02628 45.68493 
##       90       91       92       93       94       95       99      100 
## 53.56918 50.77154 50.89871 29.15345 21.65069 28.39045 51.02587 47.71957 
##      101      104      105      106      108      109      110      111 
## 44.92194 43.01446 53.31485 38.56368 27.62746 25.08416 33.22273 49.62705 
##      112      113      114      116      117      118      120      121 
## 42.76013 51.53453 23.17668 45.55777 48.86406 45.93926 44.41328 47.21091 
##      122      123      124      125      126      127      128      129 
## 48.73690 42.50580 39.83533 43.65029 41.86997 42.63296 30.67943 30.29793 
##      130      131      132      133      134      135      136      137 
## 50.64438 46.57509 47.84674 51.53453 48.60973 51.53453 48.86406 21.65069 
##      138      139      140      141      142      143      144      145 
## 32.84124 48.73690 47.08375 22.03219 48.86406 44.15895 48.86406 20.37904 
##      146      147      148      149      151      152      153 
## 36.27470 24.82983 21.14203 43.14163 42.88729 35.25738 46.95658
fitted(exclude.model)
##        1        2        3        4        5        6        7        8 
## 42.76013 33.60423 37.54635 58.40146       NA       NA 56.62114 31.18809 
##        9       10       11       12       13       14       15       16 
## 21.01487       NA       NA 51.15304 55.47666 53.44201 26.86447 61.07193 
##       17       18       19       20       21       22       23       24 
## 57.63847 28.51762 59.54595 24.19400 19.61605 59.29161 21.77786 30.29793 
##       25       26       27       28       29       30       31       32 
##       NA       NA       NA 20.25188 50.64438 46.95658 54.07784       NA 
##       33       34       35       36       37       38       39       40 
##       NA       NA       NA       NA       NA 34.74872       NA 55.60382 
##       41       42       43       44       45       46       47       48 
## 59.67311       NA       NA 37.41919       NA       NA 42.88729 54.71366 
##       49       50       51       52       53       54       55       56 
## 23.30384 33.85856 36.02037       NA       NA       NA       NA       NA 
##       57       58       59       60       61       62       63       64 
##       NA       NA       NA       NA       NA 52.80619 50.13572 48.60973 
##       65       66       67       68       69       70       71       72 
##       NA 40.85265 58.52862 53.69634 52.55186 53.18768 40.85265       NA 
##       73       74       75       76       77       78       79       80 
## 52.17036 40.85265       NA 24.70266 51.66170 53.44201 54.84083 42.37863 
##       81       82       83       84       85       86       87       88 
## 46.57509 19.48888       NA       NA 55.98532 46.95658 28.89911 29.02628 
##       89       90       91       92       93       94       95       96 
## 45.68493 53.56918 50.77154 50.89871 29.15345 21.65069 28.39045       NA 
##       97       98       99      100      101      102      103      104 
##       NA       NA 51.02587 47.71957 44.92194       NA       NA 43.01446 
##      105      106      107      108      109      110      111      112 
## 53.31485 38.56368       NA 27.62746 25.08416 33.22273 49.62705 42.76013 
##      113      114      115      116      117      118      119      120 
## 51.53453 23.17668       NA 45.55777 48.86406 45.93926       NA 44.41328 
##      121      122      123      124      125      126      127      128 
## 47.21091 48.73690 42.50580 39.83533 43.65029 41.86997 42.63296 30.67943 
##      129      130      131      132      133      134      135      136 
## 30.29793 50.64438 46.57509 47.84674 51.53453 48.60973 51.53453 48.86406 
##      137      138      139      140      141      142      143      144 
## 21.65069 32.84124 48.73690 47.08375 22.03219 48.86406 44.15895 48.86406 
##      145      146      147      148      149      150      151      152 
## 20.37904 36.27470 24.82983 21.14203 43.14163       NA 42.88729 35.25738 
##      153 
## 46.95658

na.omit and na.exclude do not use the missing values, but maintains their position for the residuals and fitted values.

summary(airquality$Ozone)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    1.00   18.00   31.50   42.13   63.25  168.00      37
table(airquality$Ozone)
## 
##   1   4   6   7   8   9  10  11  12  13  14  16  18  19  20  21  22  23  24  27 
##   1   1   1   3   1   3   1   3   2   4   4   4   4   1   4   4   1   6   2   1 
##  28  29  30  31  32  34  35  36  37  39  40  41  44  45  46  47  48  49  50  52 
##   3   1   2   1   3   1   2   2   2   2   1   1   3   2   1   1   1   1   1   1 
##  59  61  63  64  65  66  71  73  76  77  78  79  80  82  84  85  89  91  96  97 
##   2   1   1   2   1   1   1   2   1   1   2   1   1   1   1   2   1   1   1   2 
## 108 110 115 118 122 135 168 
##   1   1   1   1   1   1   1
table(airquality$Ozone,useNA="ifany")
## 
##    1    4    6    7    8    9   10   11   12   13   14   16   18   19   20   21 
##    1    1    1    3    1    3    1    3    2    4    4    4    4    1    4    4 
##   22   23   24   27   28   29   30   31   32   34   35   36   37   39   40   41 
##    1    6    2    1    3    1    2    1    3    1    2    2    2    2    1    1 
##   44   45   46   47   48   49   50   52   59   61   63   64   65   66   71   73 
##    3    2    1    1    1    1    1    1    2    1    1    2    1    1    1    2 
##   76   77   78   79   80   82   84   85   89   91   96   97  108  110  115  118 
##    1    1    2    1    1    1    1    2    1    1    1    2    1    1    1    1 
##  122  135  168 <NA> 
##    1    1    1   37
table(airquality$Ozone, useNA="always")
## 
##    1    4    6    7    8    9   10   11   12   13   14   16   18   19   20   21 
##    1    1    1    3    1    3    1    3    2    4    4    4    4    1    4    4 
##   22   23   24   27   28   29   30   31   32   34   35   36   37   39   40   41 
##    1    6    2    1    3    1    2    1    3    1    2    2    2    2    1    1 
##   44   45   46   47   48   49   50   52   59   61   63   64   65   66   71   73 
##    3    2    1    1    1    1    1    1    2    1    1    2    1    1    1    2 
##   76   77   78   79   80   82   84   85   89   91   96   97  108  110  115  118 
##    1    1    2    1    1    1    1    2    1    1    1    2    1    1    1    1 
##  122  135  168 <NA> 
##    1    1    1   37
length(airquality$Ozone)
## [1] 153
x1 <- sort(airquality$Ozone)
x1
##   [1]   1   4   6   7   7   7   8   9   9   9  10  11  11  11  12  12  13  13
##  [19]  13  13  14  14  14  14  16  16  16  16  18  18  18  18  19  20  20  20
##  [37]  20  21  21  21  21  22  23  23  23  23  23  23  24  24  27  28  28  28
##  [55]  29  30  30  31  32  32  32  34  35  35  36  36  37  37  39  39  40  41
##  [73]  44  44  44  45  45  46  47  48  49  50  52  59  59  61  63  64  64  65
##  [91]  66  71  73  73  76  77  78  78  79  80  82  84  85  85  89  91  96  97
## [109]  97 108 110 115 118 122 135 168
length(x1)
## [1] 116
x2 <- sort(airquality$Ozone, na.last = TRUE)
x2
##   [1]   1   4   6   7   7   7   8   9   9   9  10  11  11  11  12  12  13  13
##  [19]  13  13  14  14  14  14  16  16  16  16  18  18  18  18  19  20  20  20
##  [37]  20  21  21  21  21  22  23  23  23  23  23  23  24  24  27  28  28  28
##  [55]  29  30  30  31  32  32  32  34  35  35  36  36  37  37  39  39  40  41
##  [73]  44  44  44  45  45  46  47  48  49  50  52  59  59  61  63  64  64  65
##  [91]  66  71  73  73  76  77  78  78  79  80  82  84  85  85  89  91  96  97
## [109]  97 108 110 115 118 122 135 168  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## [127]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## [145]  NA  NA  NA  NA  NA  NA  NA  NA  NA
length(x2)
## [1] 153

7.0.1 missing data oruntusu

mice::md.pattern(airquality) # mice paketi

##     Wind Temp Month Day Solar.R Ozone   
## 111    1    1     1   1       1     1  0
## 35     1    1     1   1       1     0  1
## 5      1    1     1   1       0     1  1
## 2      1    1     1   1       0     0  2
##        0    0     0   0       7    37 44

8 missing data gorsellestirme

9 VIM paketi

Aggregations for missing/imputed values Calculate or plot the amount of missing/imputed values in each variable and the amount of missing/imputed values in certain combinations of variables.

aggr(x, delimiter = NULL, plot = TRUE, ...)

## S3 method for class 'aggr'
plot(x, col = c("skyblue", "red", "orange"), bars = TRUE,
  numbers = FALSE, prop = TRUE, combined = FALSE, varheight = FALSE,
  only.miss = FALSE, border = par("fg"), sortVars = FALSE,
  sortCombs = TRUE, ylabs = NULL, axes = TRUE, labels = axes,
  cex.lab = 1.2, cex.axis = par("cex"), cex.numbers = par("cex"),
  gap = 4, ...)
a<-VIM::aggr(airquality, prop=FALSE, numbers=TRUE) # VIM paketi

summary(a) # missing data oruntusu
## 
##  Missings per variable: 
##  Variable Count
##     Ozone    37
##   Solar.R     7
##      Wind     0
##      Temp     0
##     Month     0
##       Day     0
## 
##  Missings in combinations of variables: 
##  Combinations Count   Percent
##   0:0:0:0:0:0   111 72.549020
##   0:1:0:0:0:0     5  3.267974
##   1:0:0:0:0:0    35 22.875817
##   1:1:0:0:0:0     2  1.307190

matrix plot

Create a matrix plot, in which all cells of a data matrix are visualized by rectangles. Available data is coded according to a continuous color scheme, while missing/imputed data is visualized by a clearly distinguishable color.

matrixplot(x, delimiter = NULL, sortby = NULL, col = c("red", "orange"),
  fixup = TRUE, xlim = NULL, ylim = NULL, main = NULL,
  sub = NULL, xlab = NULL, ylab = NULL, axes = TRUE, labels = axes,
  xpd = NULL, interactive = TRUE, ...)
VIM::matrixplot(airquality)

## 
## Click in a column to sort by the corresponding variable.
## To regain use of the VIM GUI and the R console, click outside the plot region.
VIM::matrixplot(airquality, interactive = TRUE, sortby = "Ozone") #!

## 
## Click in a column to sort by the corresponding variable.
## To regain use of the VIM GUI and the R console, click outside the plot region.

Scatterplot with additional information in the margins In addition to a standard scatterplot, information about missing/imputed values is shown in the plot margins. Furthermore, imputed values are highlighted in the scatterplot.

marginplot(x, delimiter = NULL, col = c("skyblue", "red", "red4", "orange",
  "orange4"), alpha = NULL, pch = c(1, 16), cex = par("cex"),
  numbers = TRUE, cex.numbers = par("cex"), zeros = FALSE, xlim = NULL,
  ylim = NULL, main = NULL, sub = NULL, xlab = NULL, ylab = NULL,
  ann = par("ann"), axes = TRUE, frame.plot = axes, ...)
VIM::marginplot(airquality[,c("Ozone","Solar.R")])

VIM::marginplot(airquality[c("Ozone","Solar.R")], pch=c(20),col=c("darkgray", "red", "blue"))

Marginplot Matrix Create a scatterplot matrix with information about missing/imputed values in the plot margins of each panel.

marginmatrix(x, delimiter = NULL, col = c("skyblue", "red", "red4",
  "orange", "orange4"), alpha = NULL, ...)
VIM::marginmatrix(airquality[,-5])

Barplot with information about missing/imputed values Barplot with highlighting of missing/imputed values in other variables by splitting each bar into two parts. Additionally, information about missing/imputed values in the variable of interest is shown on the right hand side.

barMiss(x, delimiter = NULL, pos = 1, selection = c("any", "all"),
  col = c("skyblue", "red", "skyblue4", "red4", "orange", "orange4"),
  border = NULL, main = NULL, sub = NULL, xlab = NULL, ylab = NULL,
  axes = TRUE, labels = axes, only.miss = TRUE, miss.labels = axes,
  interactive = TRUE, ...)
VIM::barMiss(airquality[,c("Month","Ozone")])

## 
## Click in in the left margin to switch to the previous variable or in the right margin to switch to the next variable.
## To regain use of the VIM GUI and the R console, click anywhere else in the graphics window.
airquality[,c("Month","Ozone")] # grafik verisini gormek icin

Rug representation of missing/imputed values Add a rug representation of missing/imputed values in only one of the variables to scatterplots.

rugNA(x, y, ticksize = NULL, side = 1, col = "red", alpha = NULL,
  miss = NULL, lwd = 0.5, ...)
plot(airquality$Ozone, airquality$Solar.R)
VIM::rugNA(airquality$Ozone, airquality$Solar.R,side=1) # y eksenindeki missingleri x ekseni uzerinde gosteriyor
VIM::rugNA(airquality$Ozone, airquality$Solar.R,ticksize = 1, col= "orange", side=2) # miss argumani ekleyerek missing yerine imputed veriler gosterilebilir ?rugNA bakiniz

Scatterplot matrix with information about missing/imputed values Scatterplot matrix in which observations with missing/imputed values in certain variables are highlighted.

scattmatrixMiss(x, delimiter = NULL, highlight = NULL,
  selection = c("any", "all"), plotvars = NULL, col = c("skyblue", "red",
  "orange"), alpha = NULL, pch = c(1, 3), lty = par("lty"),
  diagonal = c("density", "none"), interactive = TRUE, ...)
VIM::scattmatrixMiss(airquality) # all variables highlighted, delimiter argumani imputed degerler icin kullanilir, ?scattmatrixMiss

## 
## Click in a diagonal panel to add to or remove from the highlight selection.
## To regain use of the VIM GUI and the R console, click anywhere else in the graphics window.
## 
## Highlighted missings in any of the variables 'Ozone', 'Solar.R', 'Wind', 'Temp', 'Month', 'Day'.
VIM::scattmatrixMiss(airquality, highlight = "Ozone")

## 
## Click in a diagonal panel to add to or remove from the highlight selection.
## To regain use of the VIM GUI and the R console, click anywhere else in the graphics window.
## 
## Highlighted 'missings' in variable 'Ozone'.

Parallel boxplots with information about missing/imputed values Boxplot of one variable of interest plus information about missing/imputed values in other variables.

pbox(x, delimiter = NULL, pos = 1, selection = c("none", "any", "all"),
  col = c("skyblue", "red", "red4", "orange", "orange4"), numbers = TRUE,
  cex.numbers = par("cex"), xlim = NULL, ylim = NULL, main = NULL,
  sub = NULL, xlab = NULL, ylab = NULL, axes = TRUE,
  frame.plot = axes, labels = axes, interactive = TRUE, ...)
VIM::pbox(airquality) # parallel boxplots obs ve miss icin
## Warning in createPlot(main, sub, xlab, ylab, labels, ca$at): not enough space to
## display frequencies

## 
## Click in in the left margin to switch to the previous variable or in the right margin to switch to the next variable.
## To regain use of the VIM GUI and the R console, click anywhere else in the graphics window.

Parallel coordinate plot with information about missing/imputed values Parallel coordinate plot with adjustments for missing/imputed values. Missing values in the plotted variables may be represented by a point above the corresponding coordinate axis to prevent disconnected lines. In addition, observations with missing/imputed values in selected variables may be highlighted.

parcoordMiss(x, delimiter = NULL, highlight = NULL, selection = c("any",
  "all"), plotvars = NULL, plotNA = TRUE, col = c("skyblue", "red",
  "skyblue4", "red4", "orange", "orange4"), alpha = NULL, lty = par("lty"),
  xlim = NULL, ylim = NULL, main = NULL, sub = NULL, xlab = NULL,
  ylab = NULL, labels = TRUE, xpd = NULL, interactive = TRUE, ...)
VIM::parcoordMiss(airquality) #paralel koordinatlar grafigi

## 
## Click on a coordinate axis to add to or remove from the highlight selection.
## Click in the top margin to toggle visualizing missing  values in the plot variables.
## To regain use of the VIM GUI and the R console, click in any of the other plot margins.
## 
## Highlighted missings in any of the variables 'Ozone', 'Solar.R', 'Wind', 'Temp', 'Month', 'Day'.

Bivariate jitter plot Create a bivariate jitter plot.

scattJitt(x, delimiter = NULL, col = c("skyblue", "red", "red4", "orange",
  "orange4"), alpha = NULL, cex = par("cex"), col.line = "lightgrey",
  lty = "dashed", lwd = par("lwd"), numbers = TRUE,
  cex.numbers = par("cex"), main = NULL, sub = NULL, xlab = NULL,
  ylab = NULL, axes = TRUE, frame.plot = axes, labels = c("observed",
  "missing", "imputed"), ...)
VIM::scattJitt(airquality[,1:2]) # Ozone ve Solar.R icin jittered scatterplot

plot(airquality[,1:2])

Spineplot with information about missing/imputed values Spineplot or spinogram with highlighting of missing/imputed values in other variables by splitting each cell into two parts. Additionally, information about missing/imputed values in the variable of interest is shown on the right hand side.

spineMiss(x, delimiter = NULL, pos = 1, selection = c("any", "all"),
  breaks = "Sturges", right = TRUE, col = c("skyblue", "red", "skyblue4",
  "red4", "orange", "orange4"), border = NULL, main = NULL, sub = NULL,
  xlab = NULL, ylab = NULL, axes = TRUE, labels = axes,
  only.miss = TRUE, miss.labels = axes, interactive = TRUE, ...)
VIM::spineMiss(airquality[,c("Month","Solar.R")]) # spineplot /spinogram

## 
## Click in in the left margin to switch to the previous variable or in the right margin to switch to the next variable.
## To regain use of the VIM GUI and the R console, click anywhere else in the graphics window.

Scatterplot with information about missing/imputed values

In addition to a standard scatterplot, lines are plotted for the missing values in one variable. If there are imputed values, they will be highlighted.

scattMiss(x, delimiter = NULL, side = 1, col = c("skyblue", "red",
  "orange", "lightgrey"), alpha = NULL, lty = c("dashed", "dotted"),
  lwd = par("lwd"), quantiles = c(0.5, 0.975), inEllipse = FALSE,
  zeros = FALSE, xlim = NULL, ylim = NULL, main = NULL, sub = NULL,
  xlab = NULL, ylab = NULL, interactive = TRUE, ...)
VIM::scattMiss(airquality[,c("Ozone","Solar.R")]) # missing data line ile gosterilir

## 
## Click in bottom or left margin to change the 'side' argument accordingly.
## To regain use of the VIM GUI and the R console, click anywhere else in the graphics window.

Mosaic plot with information about missing/imputed values

Create a mosaic plot with information about missing/imputed values.

mosaicMiss(x, delimiter = NULL, highlight = NULL, selection = c("any",
  "all"), plotvars = NULL, col = c("skyblue", "red", "orange"),
  labels = NULL, miss.labels = TRUE, ...)

datayı duzelt

VIM::mosaicMiss(airquality, high = 4, plotvars = 5:6) # mosaic plot

?growdotMiss ?colormapMiss ?spineMiss ?mosaicMiss ?histMiss ?mapMiss

9.1 Amelia paketi ile gorsellestirme

Amelia::missmap(airquality)

9.2 naniar ile gorsellestirme

naniar::vis_miss(airquality)

naniar::gg_miss_upset(airquality)

#naniar::gg_miss_upset(riskfactors)
naniar::gg_miss_var(airquality)
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

naniar::gg_miss_case(airquality)

#naniar::gg_miss_fct(airquality)